* Stata do-file, Robustness tests for Chapter 8, Mark R. Beissinger, 
*    The Revolutionary City  
version 14
clear all
set more off
log using Robustnesstestfiles\Logfiles\robustnesstestschapter8.log, replace
* ============================================================================
* ROBUSTNESS CHECKS FOR STATISTICAL RESULTS APPEARING IN CHAPTER 8
* STATA  
* Robustness checks for results reported in Chapter 8  
* Author: Mark R. Beissinger  
* Date:  January 2022  
* Princeton, NJ 
* =============================================================================
* BEFORE RUNNING, YOU MUST SET THE DEFAULT PATH FOR WHERE THE DATA
*   FILES RESIDE
* =============================================================================
* Before running, download the following packages for STATA:
*	switchcopula from http://www.stata-journal.com/software/sj13-3
*	collin from https://stats.oarc.ucla.edu/stata/ado/analysis/
* ============================================================================
* The following datafiles are used in this file:
*   Data set of revolutionary episodes--revolutionaryeps.dta
* =============================================================================
* The following files are produced by these robustness tests: 
*	Robustnesstestfiles\Logfiles\robustnesstestschapter8.log
*
*	The following graphs were produced by these tests:
*		Robustnesstestfiles\Logfiles\robch8_scat1.pdf
*		Robustnesstestfiles\Logfiles\robch8_scat2.pdf
*		Robustnesstestfiles\Logfiles\robch8_scat3.pdf
*		Robustnesstestfiles\Logfiles\robch8_scat4.pdf
*	They were added to the end of the pdf output files for the robustness
*		tests for the chapter.
* =============================================================================

use revolutionaryeps.dta

* =====================================================================
* ROBUSTNESS TESTS OF ENDOGENOUS SWITCHING MODEL, Model 3 in Table 8.2
* =====================================================================
* Boostrapped standard errors, 1000 replications, with bias-corrected standard errors
* NOTE: THIS OPERATION CAN TAKE A WHILE TO EXECUTE
bootstrap, reps(1000) seed(1234): switchcopula (lndeaths = success newpolitymin1 urbancivic newgdppcthl urbandum) (lndeaths =  lnmonthsdur urbpercbefrev success) if startyear>1899, select (civilwar =  urbandum leftist ethnicorder) copula0(clayton) copula1(fgm) margin1(normal) margin0(normal) margsel(normal) iterate(75)
* Bias-corrected standard errors
estat bootstrap, bc
*	RESULT:  All results remained statistically significant, no sign shifts.

* Identification of potential outliers 
quietly:  switchcopula (lndeaths = success newpolitymin1 urbancivic newgdppcthl urbandum) (lndeaths =  lnmonthsdur urbpercbefrev success) if startyear>1899, select (civilwar =  urbandum leftist ethnicorder) copula0(clayton) copula1(fgm) margin1(normal) margin0(normal) margsel(normal)
predict xb0, xb0
predict xb1, xb1
predict cll, cll
scatter  lndeaths  xb0 if civilwar==0 & e(sample), mlab(revid) || lfit xb0 lndeaths if civilwar==0 & e(sample)
graph export Robustnesstestfiles\Logfiles\robch8_scat1.pdf, replace
* Potential outliers:  revid 216, 1991 Uprisings in Iraq
scatter  lndeaths  xb1 if civilwar==1 & e(sample), mlab(revid) || lfit xb1 lndeaths if civilwar==1 & e(sample)
graph export Robustnesstestfiles\Logfiles\robch8_scat2.pdf, replace
* Potential outliers:  revid 195, Togo 1991 Revolution
*					  revid 38, Chinese Civil War Part 1
*					  revid 359, Tunisian independence movement
*					  revid 367, Second Malayan Emergency
*   				  revid 163, Chinese Civil War Part 2
scatter  cll xb0  if e(sample) & civilwar==0, mlab(revid)
graph export Robustnesstestfiles\Logfiles\robch8_scat3.pdf, replace
* Potential outliers:  revid 384, Bajram Currie Revolt in 1922 
scatter  cll xb1  if e(sample) & civilwar==1, mlab(revid)
graph export Robustnesstestfiles\Logfiles\robch8_scat4.pdf, replace
* Potential outliers:  revid 38, Chinese Civil War Part 1
* Regression without outliers
switchcopula (lndeaths = success newpolitymin1 urbancivic newgdppcthl urbandum) (lndeaths =  lnmonthsdur urbpercbefrev success) if startyear>1899 & revid~= 216 & revid~=195 & revid~= 38 & revid~=359 & revid~=367 & revid~=163 & revid~=384, select (civilwar =  urbandum leftist ethnicorder) copula0(clayton) copula1(fgm) margin1(normal) margin0(normal) margsel(normal)
*	RESULT:  All variables significant at the .05 level or better. No signs changed.
drop xb0 xb1 cll
* Excluded ongoing episodes
switchcopula (lndeaths = success newpolitymin1 urbancivic newgdppcthl urbandum) (lndeaths =  lnmonthsdur urbpercbefrev success) if startyear>1899 & ongoing==0, select (civilwar =  urbandum leftist ethnicorder) copula0(clayton) copula1(fgm) margin1(normal) margin0(normal) margsel(normal)
*	RESULT:  All variables significant at the .05 level or better. No signs changed.


* +++++++++++++++++++++++++++++++++++++++++++
* GLM ESTIMATIONS--CIVIL WAR PORTION OF MODEL
* +++++++++++++++++++++++++++++++++++++++++++

* ===================================================================================
* GLM: ESTIMATED CHANGE IN DEATHS DUE TO SHORTENED CIVIL WARS IN POST-COLD WAR PERIOD
*		Model 3 in Table 8.2
* ===================================================================================
* GLM model on same sample as switching regression; obtain sample first, then run GLM estimation
quietly: switchcopula (lndeaths = success newpolitymin1 urbancivic newgdppcthl urbandum) (lndeaths = lnmonthsdur urbpercbefrev success) if startyear>1899, select (civilwar =  urbandum leftist ethnicorder) copula0(clayton) copula1(fgm) margin1(normal) margin0(normal) margsel(normal)
glm totaldeaths lnmonthsdur urbpercbefrev success if civilwar==1 & startyear>1899 & e(sample), family(gamma) link(log)
* Calculate average duration for each period
tabstat lnmonthsdur if civilwar==1 & startyear>1899, s(mean) by(timeperiods) save
mat total1 = r(Stat2)
mat total2 = r(Stat3)
local newtot1 = total1[1,1]
display `newtot1'
local newtot2 = total2[1,1]
display `newtot2'
* Calculate ln of average duration for each period
local dur1 = `newtot1'
local dur2 = `newtot2'
* Calculate marginal effects for average durations for each period
margins, atmeans at(lnmonthsdur=(`dur1' `dur2')) post
* Calculate difference between marginal effects for each period
scalar m1 = el(r(b),1,1)
scalar m2 = el(r(b),1,2)
scalar mdiff = m2 - m1
* Calculate effect: Multiply effect times number of civil wars in post-Cold War period
tab timeperiods civilwar if startyear>1899, matcell(tper)
scalar cwnum = tper[3,2]
display cwnum
display mdiff * cwnum
* Drop scalars and macros
macro drop _all
scalar drop _all

* ============================================================================
* GLM: ESTIMATED CHANGE IN DEATHS DUE TO URBANIZATION IN POST-COLD WAR PERIOD
*		Model 3 in Table 8.2
* ============================================================================
* GLM model on same sample as switching regression; obtain sample first, then run GLM estimation
quietly: switchcopula (lndeaths = success newpolitymin1 urbancivic newgdppcthl urbandum) (lndeaths = lnmonthsdur urbpercbefrev success) if startyear>1899, select (civilwar =  urbandum leftist ethnicorder) copula0(clayton) copula1(fgm) margin1(normal) margin0(normal) margsel(normal)
glm totaldeaths lnmonthsdur urbpercbefrev success if civilwar==1 & startyear>1899 & e(sample), family(gamma) link(log)
tabstat urbpercbefrev if civilwar==1 & startyear>1899, s(mean) by(timeperiods) save
mat total1 = r(Stat2)
mat total2 = r(Stat3)
local newtot1 = total1[1,1]
display `newtot1'
local newtot2 = total2[1,1]
display `newtot2'
* Reassign variables
local urb1 = `newtot1'
local urb2 = `newtot2'
* Calculate marginal effects for average urbanization for each period
margins, atmeans at(urbpercbefrev=(`urb1' `urb2')) post
* Calculate difference between marginal effects for each period
scalar m1 = el(r(b),1,1)
scalar m2 = el(r(b),1,2)
scalar mdiff = m2 - m1
display mdiff
* Calculate effect: Multiply effect times number of civil wars in post-Cold War period
tab timeperiods civilwar if startyear>1899, matcell(tper)
scalar cwnum = tper[3,2]
display cwnum
display mdiff * cwnum
* Drop scalars and macros
macro drop _all
scalar drop _all

* =================================================================================================
* GLM: ESTIMATED CHANGE IN DEATHS DUE TO CHANGING RATES OF SUCCESS IN CIVIL WAR 
*		IN POST-COLD WAR PERIOD, Model 3 in Table 8.2
* =================================================================================================
* GLM model on same sample as switching regression; obtain sample first, then run GLM estimation
quietly: switchcopula (lndeaths = success newpolitymin1 urbancivic newgdppcthl urbandum) (lndeaths = lnmonthsdur urbpercbefrev success) if startyear>1899, select (civilwar =  urbandum leftist ethnicorder) copula0(clayton) copula1(fgm) margin1(normal) margin0(normal) margsel(normal)
glm totaldeaths lnmonthsdur urbpercbefrev success if civilwar==1 & startyear>1899 & e(sample), family(gamma) link(log)
* Calculate marginal effects for successful and failed revolutionary civil wars
margins, atmeans at(success=(0 1)) post
* Calculate difference between marginal effects for failed and successful revolutionary civil wars 
scalar m1 = el(r(b),1,1)
scalar m2 = el(r(b),1,2)
scalar mdiff = m2 - m1
display mdiff
* Calculate difference in number of successes for each period
tab timeperiod success if civilwar==1 & startyear>1899, matcell(civsuc)
local cwnum2 = civsuc[2,2]
display `cwnum2'
local cwnum3 = civsuc[3,2]
display `cwnum3'
local cwnum4 = `cwnum3' - `cwnum2'
display `cwnum4'
* Calculate effect:  Multiply difference in number of successes by difference in marginal effects
display mdiff * `cwnum4'
* Drop scalars and macros
macro drop _all
scalar drop _all

* ========================================================================================
* GLM: ESTIMATED CHANGE IN DEATHS DUE TO CHANGING POLITY SCORES IN POST-COLD WAR PERIOD, 
*		Model 4 in Table 8.2
* ========================================================================================
* GLM model on same sample as switching regression; obtain sample first, then run GLM estimation
quietly: switchcopula (lndeaths = success newpolitymin1 urbancivic newgdppcthl urbandum) (lndeaths =  lnmonthsdur urbpercbefrev success newpolitymin1) if startyear>1899, select (civilwar =  urbandum leftist ethnicorder) copula0(clayton) copula1(fgm) margin1(normal) margin0(normal) margsel(normal)
glm totaldeaths lnmonthsdur urbpercbefrev success newpolitymin1 if civilwar==1 & startyear>1899 & e(sample), family(gamma) link(log)
* Calculate average Polity score of states experiencing civil wars for each period
tabstat newpolitymin1 if civilwar==1 & startyear>1899, s(mean) by(timeperiods) save
mat total1 = r(Stat2)
mat total2 = r(Stat3)
local pol1= total1[1,1]
display `pol1'
local pol2 = total2[1,1]
display `pol2'
* Calculate marginal effects for average Polity score for each period
margins, atmeans at(newpolitymin1=(`pol1' `pol2')) post
* Calculate difference between marginal effects for each period
scalar m1 = el(r(b),1,1)
scalar m2 = el(r(b),1,2)
scalar mdiff = m2 - m1
display mdiff
* Calculate effect: Multiply effect times number of civil wars in post-Cold War period
tab timeperiods civilwar if startyear>1899, matcell(tper)
scalar cwnum = tper[3,2]
display cwnum
display mdiff * cwnum
* Drop scalars and macros
macro drop _all
scalar drop _all


* ==============================================================================
* GLM: ESTIMATED CHANGE IN DEATHS DUE TO POPULATION SIZE IN POST-COLD WAR PERIOD
* ==============================================================================
* GLM model on same sample as switching regression; obtain sample first, then run GLM estimation
quietly: switchcopula (lndeaths = success newpolitymin1 urbancivic newgdppcthl urbandum) (lndeaths =  lnmonthsdur urbpercbefrev success lnpop) if startyear>1899, select (civilwar =  urbandum leftist ethnicorder) copula0(clayton) copula1(fgm) margin1(normal) margin0(normal) margsel(normal)
glm totaldeaths lnmonthsdur urbpercbefrev success lnpop if civilwar==1 & startyear>1899 & e(sample), family(gamma) link(log)
* Calculate average pop of states experiencing civil wars for each period
tabstat lnpop if civilwar==1 & startyear>1899, s(mean) by(timeperiods) save
mat total1 = r(Stat2)
mat total2 = r(Stat3)
local pop1= total1[1,1]
display `pop1'
local pop2 = total2[1,1]
display `pop2'
* Calculate marginal effects for average pop for each period
margins, atmeans at(lnpop=(`pop1' `pop2')) post
* Calculate difference between marginal effects for each period
scalar m1 = el(r(b),1,1)
scalar m2 = el(r(b),1,2)
scalar mdiff = m2 - m1
display mdiff
* Calculate effect: Multiply effect times number of civil wars in post-Cold War period
tab timeperiods civilwar if startyear>1899, matcell(tper)
scalar cwnum = tper[3,2]
display cwnum
display mdiff * cwnum
* Drop scalars and macros
macro drop _all
scalar drop _all

* ======================================================================
* ROBUSTNESS TESTS FOR SELECTION PORTION OF MODEL, Model 3 in Table 8.3
* ======================================================================
* Robust standard errors
quietly: switchcopula (lndeaths = success newpolitymin1 urbancivic newgdppcthl urbandum) (lndeaths =  lnmonthsdur urbpercbefrev success) if startyear>1899, select (civilwar =  urbandum leftist ethnicorder) copula0(clayton) copula1(fgm) margin1(normal) margin0(normal) margsel(normal)
probit civilwar urbandum leftist ethnicorder if startyear>1899 & e(sample), vce(robust)
*	RESULT:  All variables remain statistically significant at the .05 level or better
* Area under the curve
lroc
graph export Robustnesstestfiles\Logfiles\robch8_lroc.pdf, replace
*	RESULT:  Explains .9265 of area under the curve
* Classification capacity
estat classification
*	RESULT:  Model properly classifies 86.73 percent of cases
* Testing for collinearity
collin  urbandum  leftist ethnicorder if startyear>1899 & e(sample)
*	RESULT:  tolerances are all > .5
* Boostrapped standard errors
quietly: switchcopula (lndeaths = success newpolitymin1 urbancivic newgdppcthl urbandum) (lndeaths =  lnmonthsdur urbpercbefrev success) if startyear>1899, select (civilwar =  urbandum leftist ethnicorder) copula0(clayton) copula1(fgm) margin1(normal) margin0(normal) margsel(normal)
generate sample=0
replace sample=1 if e(sample)==1
bootstrap, reps(1000) seed(1234): probit civilwar urbandum leftist ethnicorder if startyear>1899 & sample==1
* Result: all variables remain significant at the .05 level or better.
drop sample


* ++++++++++++++++++++++++++++++++++++++
* GLM ESTIMATES FOR NO CIVIL WAR REGIME
* ++++++++++++++++++++++++++++++++++++++

* =======================================================================
* GLM: ESTIMATED CHANGE IN DEATHS DUE TO CHANGING LOCATIONS FOR NO CIVIL 
*		WAR REGIME, Model 5 in Table 8.4
* =======================================================================
* GLM model on same sample as switching regression; obtain sample first, then run GLM estimation
quietly: switchcopula (lndeaths = success newpolitymin1 urbancivic newgdppcthl urbandum) (lndeaths =  lnmonthsdur urbpercbefrev success) if startyear>1899, select (civilwar =  urbandum leftist ethnicorder) copula0(clayton) copula1(fgm) margin1(normal) margin0(normal) margsel(normal)
glm totaldeaths success newpolitymin1 urbancivic  newgdppcthl urbandum if civilwar==0 & startyear>1899 & e(sample), family(gamma) link(log)
* Calculate marginal effects for urban location
margins, atmeans at(urbandum=(0 1)) post
* Calculate difference between marginal effects for urban and rural episodes without civil wars 
scalar m1 = el(r(b),1,1)
scalar m2 = el(r(b),1,2)
scalar mdiff = m2 - m1
display mdiff
* Calculate difference in number of urban episodes without civil wars for each period
tab timeperiod urbandum if civilwar==0 & startyear>1899, matcell(civsuc)
local cwnum1 = civsuc[1,2]
display `cwnum1'
local cwnum3 = civsuc[3,2]
display `cwnum3'
local cwnum4 = `cwnum3' - `cwnum1'
display `cwnum4'
* Calculate effect:  Multiply difference in number of urban episodes without civil wars by difference in marginal effects
display mdiff * `cwnum4'
* Drop scalars and macros
macro drop _all
scalar drop _all

* ===================================================================
* GLM: ESTIMATED CHANGE IN DEATHS DUE TO CHANGING GDP PER CAPITA FOR 
*		NO CIVIL WAR REGIME
* ===================================================================
* GLM model on same sample as switching regression; obtain sample first, then run GLM estimation
quietly: switchcopula (lndeaths = success newpolitymin1 urbancivic newgdppcthl urbandum ) (lndeaths =  lnmonthsdur urbpercbefrev success) if startyear>1899, select (civilwar =  urbandum leftist ethnicorder) vce(robust) copula0(clayton) copula1(fgm) margin1(normal) margin0(normal) margsel(normal) 
quietly: glm totaldeaths success newpolitymin1 urbancivic newgdppcthl urbandum if civilwar==0 & startyear>1899 & e(sample), family(gamma) link(log)
* Calculate marginal effects for GDP per capita
tabstat newgdppcthl if civilwar==0 & startyear>1899, s(mean) by(timeperiods) save
mat total1 = r(Stat1)
mat total3 = r(Stat3)
local newtot1 = total1[1,1]
display `newtot1'
local newtot3 = total3[1,1]
display `newtot3'
* Reassign var
local lev1 = `newtot1'
local lev3 = `newtot3'
* Calculate marginal effects for success rates for each period
margins, atmeans at(newgdppcthl=(`lev1' `lev3')) subpop(if civilwar==0)
* Calculate difference between marginal effects for each period
scalar m1 = el(r(b),1,1)
scalar m3 = el(r(b),1,2)
* Calculate effect: Multiply effect times number of non-civil-war episodes in post-Cold War period
tab timeperiods civilwar if startyear>1899, matcell(tper)
scalar ncwnum1 = tper[1,1]
scalar ncwnum3 = tper[3,1]
display ncwnum1
display ncwnum3
scalar effper1 = m1 * ncwnum1
scalar effper3 = m3 * ncwnum3
display effper3 - effper1
* Drop scalars and macros
macro drop _all
scalar drop _all

* ==========================================================================
* GLM: ESTIMATED CHANGE IN DEATHS DUE TO URBAN CIVIC REPERTOIRE IN EPISODES 
*		WITH NO CIVIL WAR 
* ==========================================================================
* GLM model on same sample as switching regression; obtain sample first, then run GLM estimation
quietly: switchcopula (lndeaths = success newpolitymin1 urbancivic newgdppcthl urbandum ) (lndeaths =  lnmonthsdur urbpercbefrev success) if startyear>1899, select (civilwar =  urbandum leftist ethnicorder) copula0(clayton) copula1(fgm) margin1(normal) margin0(normal) margsel(normal) 
quietly: glm totaldeaths success newpolitymin1 urbancivic  newgdppcthl urbandum if civilwar==0 & startyear>1899 & e(sample), family(gamma) link(log)
* Calculate marginal effects for urban civic
margins, atmeans at(urbancivic=(0 1)) post
* Calculate difference between marginal effects for urban civic without civil wars 
scalar m1 = el(r(b),1,1)
scalar m2 = el(r(b),1,2)
scalar mdiff = m2 - m1
display mdiff
* Calculate difference in number of urban civic episodes without civil wars for each period
tab timeperiod urbancivic if civilwar==0 & startyear>1899, matcell(civsuc)
local cwnum1 = civsuc[1,2]
display `cwnum1'
local cwnum3 = civsuc[3,2]
display `cwnum3'
local cwnum4 = `cwnum3' - `cwnum1'
display `cwnum4'
* Calculate effect:  Multiply difference in number of urban civic episodes without civil wars by difference in marginal effects
display mdiff * `cwnum4'
* Drop scalars and macros
macro drop _all
scalar drop _all

* ===============================================================
* GLM: ESTIMATED CHANGE IN DEATHS DUE TO CHANGE IN FREQUENCY OF 
*		OPPOSITION SUCCESS IN EPISODES WITH NO CIVIL WAR 
* ===============================================================
* GLM model on same sample as switching regression; obtain sample first, then run GLM estimation
quietly: switchcopula (lndeaths = success newpolitymin1 urbancivic newgdppcthl urbandum ) (lndeaths =  lnmonthsdur urbpercbefrev success) if startyear>1899, select (civilwar =  urbandum leftist ethnicorder) vce(robust) copula0(clayton) copula1(fgm) margin1(normal) margin0(normal) margsel(normal) 
quietly: glm totaldeaths success newpolitymin1 urbancivic newgdppcthl urbandum if civilwar==0 & startyear>1899 & e(sample), family(gamma) link(log)
* Calculate marginal effects for opposition success for revolutionary episodes
margins, atmeans at(success=(0 1)) post
* Calculate difference between marginal effects for successful episodes without civil wars 
scalar m1 = el(r(b),1,1)
scalar m2 = el(r(b),1,2)
scalar mdiff = m2 - m1
display mdiff
* Calculate difference in number of successful episodes without civil wars for each period
tab timeperiod success if civilwar==0 & startyear>1899, matcell(civsuc)
local cwnum1 = civsuc[1,2]
display `cwnum1'
local cwnum3 = civsuc[3,2]
display `cwnum3'
local cwnum4 = `cwnum3' - `cwnum1'
display `cwnum4'
* Calculate effect:  Multiply difference in number of successful episodes without civil wars by difference in marginal effects
display mdiff * `cwnum4'
* Drop scalars and macros
macro drop _all
scalar drop _all

* ========================================================================
* GLM ESTIMATION: ESTIMATED EFFECT ON DEATHS OF POLITY SCORES IN EPISODES 
*		WITHOUT CIVIL WARS, 1985-2014 vs. 1900-1949 
* ========================================================================
* Full switching model
quietly: switchcopula (lndeaths = success newpolitymin1 urbancivic newgdppcthl urbandum ) (lndeaths =  lnmonthsdur urbpercbefrev success) if startyear>1899, select (civilwar =  urbandum leftist ethnicorder) vce(robust) copula0(clayton) copula1(fgm) margin1(normal) margin0(normal) margsel(normal) 
quietly: glm totaldeaths success newpolitymin1 urbancivic newgdppcthl urbandum if civilwar==0 & startyear>1899 & e(sample), family(gamma) link(log)
tabstat newpolitymin1 if civilwar==0 & startyear>1899, s(mean) by(timeperiods) save
mat total1 = r(Stat1)
mat total3 = r(Stat3)
local newtot1 = total1[1,1]
display `newtot1'
local newtot3 = total3[1,1]
display `newtot3'
* Reassign var
local lev1 = `newtot1'
local lev3 = `newtot3'
* Calculate marginal effects for success rates for each period
margins, atmeans at(newpolitymin1=(`lev1' `lev3')) subpop(if civilwar==0)
* Calculate difference between marginal effects for each period
scalar m1 = el(r(b),1,1)
scalar m3 = el(r(b),1,2)
* Calculate effect: Multiply effect times number of non-civil-war episodes in post-Cold War period
tab timeperiods civilwar if startyear>1899, matcell(tper)
scalar ncwnum1 = tper[1,1]
scalar ncwnum3 = tper[3,1]
display ncwnum1
display ncwnum3
scalar effper1 = m1 * ncwnum1
scalar effper3 = m3 * ncwnum3
display effper3 - effper1
* Drop scalars and macros
macro drop _all
scalar drop _all



log close

clear
